Source Code of org.terrier.structures.InvertedIndex

/*
 * Terrier - Terabyte Retriever 
 * Webpage: http://terrier.org 
 * Contact: terrier{a.}dcs.gla.ac.uk
 * University of Glasgow - School of Computing Science
 * http://www.gla.ac.uk/
 * 
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is InvertedIndex.java.
 *
 * The Original Code is Copyright (C) 2004-2011 the University of Glasgow.
 * All Rights Reserved.
 *
 * Contributor(s):
 *   Douglas Johnson <johnsoda{a.}dcs.gla.ac.uk> (original author)
 *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk> 
 *   Craig Macdonald <craigm{a.}dcs.gla.ac.uk>
 */
package org.terrier.structures;
import java.io.IOException;


import org.apache.log4j.Logger;


import org.terrier.compression.BitIn;
import org.terrier.compression.BitInSeekable;
import org.terrier.structures.postings.BasicIterablePosting;
import org.terrier.structures.postings.IterablePosting;
import org.terrier.utility.FieldScore;
import org.terrier.utility.io.WrappedIOException;
/**
 * This class implements the inverted index 
 * for performing retrieval, with field information
 * optionally.
 * @author Douglas Johnson, Vassilis Plachouras, Craig Macdonald
 */
public class InvertedIndex extends BitPostingIndex {
  /** The logger used for the Lexicon */
  protected static final Logger logger = Logger.getLogger(InvertedIndex.class);
  
  /** This is used during retrieval for a rough guess sizing of the temporaryTerms
    * arraylist in getDocuments(). The higher this value, the less chance that the
    * arraylist will have to be grown (growing is expensive), however more memory
    * may be used unnecessarily. */
  public static final double NORMAL_LOAD_FACTOR = 1.0;
  /** This is used during retrieval for a rough guess sizing of the temporaryTerms
    * arraylist in getDocuments() - retrieval with Fields. The higher this value, 
    * the less chance that the arraylist will have to be grown (growing is expensive), 
    * however more memory may be used unnecessarily. */
  public static final double FIELD_LOAD_FACTOR = 1.0;
  /** Indicates whether field information is used.*/
  final boolean useFieldInformation = FieldScore.USE_FIELD_INFORMATION;
  
  protected DocumentIndex doi;
  /** 
   * Get the BitFiles
   */
  public BitInSeekable[] getBitFiles() {
    return super.file;
  }
  /**
   * Construct an instance of the class with
   * @param index
   * @param structureName
   * @throws IOException
   */
  public InvertedIndex(Index index, String structureName) throws IOException
  {
    this(index, structureName, index.getDocumentIndex());
  }
  /**
   * Construct an instance of the class with
   * @param index
   * @param structureName
   * @param _doi
   * @throws IOException
   */
  public InvertedIndex(Index index, String structureName, DocumentIndex _doi) throws IOException
  {
    super(index, structureName, BasicIterablePosting.class);
    doi = _doi;
  }
  /**
   * Construct an instance of the class with
   * @param index
   * @param structureName
   * @param _doi
   * @param postingClass
   * @throws IOException
   */
  public InvertedIndex(Index index, String structureName, DocumentIndex _doi, Class<? extends IterablePosting> postingClass) throws IOException
  {
    super(index, structureName, postingClass);
    doi = _doi;
  }


  /** 
   * Print out the Inverted Index
   */
  public void print()
  {
    throw new UnsupportedOperationException("InvIndex.print() is missing. Use IndexUtil instead.");
  }
  
  @Override
  public IterablePosting getPostings(BitIndexPointer pointer) throws IOException {
    final BitIn _file = this.file[pointer.getFileNumber()].readReset(pointer.getOffset(), pointer.getOffsetBits());
    IterablePosting rtr = null;
    try{
      rtr = (fieldCount > 0) 
        ? postingConstructor.newInstance(_file, pointer.getNumberOfEntries(), doi, fieldCount)
        : postingConstructor.newInstance(_file, pointer.getNumberOfEntries(), doi);
    } catch (Exception e) {
      throw new WrappedIOException(e);
    }
    return rtr;
  }
  /** 
   * Get the documents for the specified term (lexicon entry for the term)
   */
  public int[][] getDocuments(LexiconEntry le) {
    return getDocuments((BitIndexPointer)le);
  }
  /** 
   * Get the documents for for the posting list using the pointer given
   */
  public int[][] getDocuments(BitIndexPointer pointer) {
    if (pointer==null)
      return null;
    final boolean loadTagInformation = FieldScore.USE_FIELD_INFORMATION;
    final int count = pointer.getNumberOfEntries();
    try{
      final BitIn file = this.file[pointer.getFileNumber()].readReset(pointer.getOffset(), pointer.getOffsetBits());
      int[][] documentTerms = null;
      if (loadTagInformation) { //if there are tag information to process      
        documentTerms = new int[2+fieldCount][count];
        documentTerms[0][0] = file.readGamma() - 1;
        documentTerms[1][0] = file.readUnary();
        for (int f = 0; f < fieldCount; f++) {
          documentTerms[2+f][0] = file.readUnary() - 1;
        }        


        for (int i = 1; i < count; i++) {          
          documentTerms[0][i]  = file.readGamma() + documentTerms[0][i - 1];
          documentTerms[1][i]  = file.readUnary();
          for (int f = 0; f < fieldCount; f++) {
            documentTerms[2+f][i] = file.readUnary() - 1;
          }
        }        
      } else { //no tag information to process          
        documentTerms = new int[2][count];
        //new    
        documentTerms[0][0] = file.readGamma() - 1;
        documentTerms[1][0] = file.readUnary();
        for(int i = 1; i < count; i++){               
          documentTerms[0][i] = file.readGamma() + documentTerms[0][i - 1];
          documentTerms[1][i] = file.readUnary();
        }
      }
      file.close();
      return documentTerms;
    } catch (IOException ioe) {
      logger.error("Problem reading inverted index", ioe);
      return null;
    }
  }


}
Source Code of org.terrier.structures.InvertedIndex

Related Classes of org.terrier.structures.InvertedIndex